Section 4 AB-effects (metagenomes)

4.1 Load in variants

vcfToDataframe <- function(vcf.files, contig_mapping = contig_mapping, gff.df = gff.df) {
    require(vcfR)
    res <- list()
    for (file in vcf.files) {
        library(data.table)
        vcf.content <- vcfR::read.vcfR(file, verbose = FALSE)
        vcf.fix <- as.data.frame(vcf.content@fix) # contains chr, position and substitution informations
        vcf.info <- vcfR::INFO2df(vcf.content) # get INFO field, contains DP, AF informations
        if(nrow(vcf.fix) > 0) { # there are variants
            dat <-  as.data.frame(cbind(vcf.fix[,c(1, 2, 4, 5, 6)], vcf.info[,c(1, 2)]))
            dat$majorAF <- sapply(dat$AF, minorAfToMajorAf) # transforms e.g. AF of 0.1 to 0.9, 0.9 stays 0.9 and 0.5 stays 0.5
            dat$genome <- contig_mapping[match(dat$CHROM, contig_mapping$contig),]$genome # map chr information to genome name e.g. NHMU01000001.1 -> i48
            dat$genome_hr <- translateGenomeIdToFullName(tolower(dat$genome))
            dat$mouse.id <-  substr(tools::file_path_sans_ext(basename(file)), 1, 4)
            dat$mouse.group <- translateMouseIdToTreatmentGroup(dat$mouse.id)
            dat$day <- as.integer(substr(basename(file), 6, 7))
            dat$phase <- binDaysByPhase(as.numeric(as.matrix(dat$day)))
            dat$phase_num <- binDaysByPhaseGroup(dat$day)
            dat$dp <- as.numeric(as.matrix(vcf.info$DP))
            # annotate overlay of gene
            dt.gff <- data.table(start = gff.df$start, end = gff.df$end,
                chr = as.character(as.matrix(gff.df$chr)), feature = gff.df$product)
            colnames(dat)[1:2] <- c("chr", "start")
            dat$start <- as.integer(as.matrix(dat$start))
            dat$chr <- as.character(as.matrix(dat$chr))
            dat$end <- dat$start
            dat2 <- as.data.table(dat)
            setkey(dt.gff, chr, start, end)
            annotated <- foverlaps(dat2, dt.gff, type="within", mult="first")
            res[[tools::file_path_sans_ext(basename(file))]] <- annotated # add vcf df to list
        } else{
            message(paste("Skipping", file))
        }
    }
    df <- as.data.frame(do.call(rbind, res)) # merge list to df
    return(df)
}
## data/references/joined_reference_curated_ecoli/joined_reference_curated_ecoli.gff
## Skipping out_philipp/all_vcf/1683d09_S49.vcf

4.5 number of variants per samples

4.5.1 number of variants per treatment group

Figure 4.3: number of variants of all 12 OMM genomes by mouse

4.6 Heatmap

All mice

## Warning in dcast(dat, variant.id ~ sample.id, value.var = "AF"): The dcast generic in data.table has been passed a data.frame and will attempt to redirect to the reshape2::dcast; please note that
## reshape2 is deprecated, and this redirection is now deprecated as well. Please do this redirection yourself like reshape2::dcast(dat). In the next version, this warning will become an error.
data.wide[is.na(data.wide)] <- 0
rownames(data.wide) <-  data.wide$variant.id
data.wide$variant.id <- NULL

library(circlize)
library(ComplexHeatmap)

heat <- data.matrix(data.wide)
# limit to variants that are present in at least 10% of samples
heat_num <- rowSums(heat != 0)
heat2 <- heat[which(heat_num > ncol(heat)/10),]

# limit to variants that have a high variance
heat_var_num <- matrixStats::rowVars(heat2)
heat3 <- heat2[which(heat_var_num >  quantile(heat_var_num,  0.5)) ,]

dat$dummy <- 1
annot.data <- aggregate(dummy ~ mouse.id + mouse.group + day + phase, dat, sum)
annot.data$sample.id <- paste0(annot.data$mouse.id, "-",annot.data$day)
heat3.mouse.id <- annot.data[match(colnames(heat3), annot.data$sample.id),]$mouse.id
heat3.day <- annot.data[match(colnames(heat3), annot.data$sample.id),]$day
heat3.mouse.group <- annot.data[match(colnames(heat3), annot.data$sample.id),]$mouse.group
heat3.phase <- annot.data[match(colnames(heat3), annot.data$sample.id),]$phase
heat3.phase2 <- ifelse(heat3.phase == "post-treatment", 6, NA)
ord = data.frame(day = heat3.day, mouse.id =heat3.mouse.id )

occ = as.data.frame(table(heat3.mouse.id))
ord$occ <- occ[match(ord$mouse.id, occ$heat3.mouse.id),]$Freq
data.wide.sub <- dat[match(colnames(heat3), dat$sample.id),]

col_fun = colorRamp2(c(0, 0.5, 1), c("white", "yellow", "red"))

qpcr <- read.table("qpcr.csv", header = T, sep = ";")
qpcr$universal <- NULL
rownames(qpcr) <- paste0(qpcr$mouse, "-",qpcr$day)
qpcr <- qpcr[,-c(1:5)]

qpcr <- apply(qpcr, 1, function(x) x/sum(x))

qpcr <- qpcr[,which(colnames(qpcr) %in% colnames(heat3))]
qpcr <- qpcr[,match(colnames(heat3), colnames(qpcr))]

#pdf("heat.pdf", width= 10, height = 10)
Heatmap(heat3, name = "AF", col = col_fun, border = TRUE,
top_annotation = HeatmapAnnotation(num = anno_lines(colSums(heat3),
smooth = TRUE,border = TRUE), ra = anno_barplot(t(qpcr), 
    bar_width = 1,gp = gpar(fill = 1:12), height = unit(3, "cm")),
    mouse = heat3.mouse.id,
    group = heat3.mouse.group,
    phase = heat3.phase,
day=anno_simple(heat3.day, pch =heat3.phase2)),
cluster_columns =F,
column_order = order(ord$occ, ord$mouse.id, ord$day),
right_annotation = rowAnnotation(prev = anno_barplot(rowSums(heat3))),
 row_gap = unit(0, "mm"), column_gap = unit(0, "mm"),
column_split = heat3.mouse.group,
 column_names_gp = gpar(fontsize =5),
 row_names_gp = gpar(fontsize = 3),
 show_row_dend = F,
  show_row_names = F,
 show_column_dend = F
)

4.7 with information if SNP was observed in resequencing

all high-variant

## Warning in dcast(dat, variant.id ~ sample.id, value.var = "AF"): The dcast generic in data.table has been passed a data.frame and will attempt to redirect to the reshape2::dcast; please note that
## reshape2 is deprecated, and this redirection is now deprecated as well. Please do this redirection yourself like reshape2::dcast(dat). In the next version, this warning will become an error.
data.wide[is.na(data.wide)] <- 0
rownames(data.wide) <-  data.wide$variant.id
data.wide$variant.id <- NULL

library(circlize)
library(ComplexHeatmap)

heat <- data.matrix(data.wide)
# limit to variants that are present in at least 10% of samples
heat_num <- rowSums(heat != 0)
heat2 <- heat[which(heat_num > ncol(heat)/10),]

# limit to variants that have a high variance
heat_var_num <- matrixStats::rowVars(heat2)
heat3 <- heat2[which(heat_var_num >  quantile(heat_var_num,  0.5)) ,]

dat$dummy <- 1
annot.data <- aggregate(dummy ~ mouse.id + mouse.group + day + phase, dat, sum)
annot.data$sample.id <- paste0(annot.data$mouse.id, "-",annot.data$day)
heat3.mouse.id <- annot.data[match(colnames(heat3), annot.data$sample.id),]$mouse.id
heat3.day <- annot.data[match(colnames(heat3), annot.data$sample.id),]$day
heat3.mouse.group <- annot.data[match(colnames(heat3), annot.data$sample.id),]$mouse.group
heat3.phase <- annot.data[match(colnames(heat3), annot.data$sample.id),]$phase
heat3.phase2 <- ifelse(heat3.phase == "post-treatment", 6, NA)
ord = data.frame(day = heat3.day, mouse.id =heat3.mouse.id )

occ = as.data.frame(table(heat3.mouse.id))
ord$occ <- occ[match(ord$mouse.id, occ$heat3.mouse.id),]$Freq
data.wide.sub <- dat[match(colnames(heat3), dat$sample.id),]

col_fun = colorRamp2(c(0, 0.5, 1), c("white", "yellow", "red"))

qpcr <- read.table("qpcr.csv", header = T, sep = ";")
qpcr$universal <- NULL
rownames(qpcr) <- paste0(qpcr$mouse, "-",qpcr$day)
qpcr <- qpcr[,-c(1:5)]
qpcr <- apply(qpcr, 1, function(x) x/sum(x))
qpcr <- qpcr[,which(colnames(qpcr) %in% colnames(heat3))]
qpcr <- qpcr[,match(colnames(heat3), colnames(qpcr))]
bug <- sapply(strsplit(rownames(heat3), split='-', fixed=TRUE), `[`, 1) 
fixed <- sapply(strsplit(rownames(heat3), split='-', fixed=TRUE), `[`, 2) 

pdf("heat.pdf", width= 10, height = 10)
Heatmap(heat3, name = "AF", col = col_fun, border = TRUE,
top_annotation = HeatmapAnnotation(num = anno_lines(colSums(heat3),
smooth = TRUE,border = TRUE), ra = anno_barplot(t(qpcr), 
    bar_width = 1,gp = gpar(fill = 1:12), height = unit(3, "cm")),
    mouse = heat3.mouse.id,
    group = heat3.mouse.group,
    phase = heat3.phase,
day=anno_simple(heat3.day, pch =heat3.phase2)),
cluster_columns =F,
column_order = order(ord$occ, ord$mouse.id, ord$day),
right_annotation = rowAnnotation(fixed = fixed, bug = bug,  prev = anno_barplot(rowSums(heat3)), col = bugcolors),
 row_gap = unit(0, "mm"), column_gap = unit(0, "mm"),
column_split = heat3.mouse.group,
 column_names_gp = gpar(fontsize =5),
 row_names_gp = gpar(fontsize = 3),
 show_row_dend = F,
  show_row_names = F,
 show_column_dend = F
)
dev.off()
## pdf 
##   2

All that are fixed

## Warning in dcast(dat, variant.id ~ sample.id, value.var = "AF"): The dcast generic in data.table has been passed a data.frame and will attempt to redirect to the reshape2::dcast; please note that
## reshape2 is deprecated, and this redirection is now deprecated as well. Please do this redirection yourself like reshape2::dcast(dat). In the next version, this warning will become an error.
data.wide[is.na(data.wide)] <- 0
rownames(data.wide) <-  data.wide$variant.id
data.wide$variant.id <- NULL

library(circlize)
library(ComplexHeatmap)

heat <- data.matrix(data.wide)
# limit to variants that are present in at least 10% of samples
heat_num <- rowSums(heat != 0)
heat2 <- heat[which(heat_num > ncol(heat)/10),]

# limit to variants that have a high variance
heat_var_num <- matrixStats::rowVars(heat2)
heat3 <- heat2[which(heat_var_num >  quantile(heat_var_num,  0.5)) ,]
heat3 <- heat
dat$dummy <- 1
annot.data <- aggregate(dummy ~ mouse.id + mouse.group + day + phase, dat, sum)
annot.data$sample.id <- paste0(annot.data$mouse.id, "-",annot.data$day)
heat3.mouse.id <- annot.data[match(colnames(heat3), annot.data$sample.id),]$mouse.id
heat3.day <- annot.data[match(colnames(heat3), annot.data$sample.id),]$day
heat3.mouse.group <- annot.data[match(colnames(heat3), annot.data$sample.id),]$mouse.group
heat3.phase <- annot.data[match(colnames(heat3), annot.data$sample.id),]$phase
heat3.phase2 <- ifelse(heat3.phase == "post-treatment", 6, NA)
ord = data.frame(day = heat3.day, mouse.id =heat3.mouse.id )

occ = as.data.frame(table(heat3.mouse.id))
ord$occ <- occ[match(ord$mouse.id, occ$heat3.mouse.id),]$Freq
data.wide.sub <- dat[match(colnames(heat3), dat$sample.id),]

col_fun = colorRamp2(c(0, 0.5, 1), c("white", "yellow", "red"))

qpcr <- read.table("qpcr.csv", header = T, sep = ";")
qpcr$universal <- NULL
rownames(qpcr) <- paste0(qpcr$mouse, "-",qpcr$day)
qpcr <- qpcr[,-c(1:5)]
qpcr <- apply(qpcr, 1, function(x) x/sum(x))
qpcr <- qpcr[,which(colnames(qpcr) %in% colnames(heat3))]
qpcr <- qpcr[,match(colnames(heat3), colnames(qpcr))]
bug <- sapply(strsplit(rownames(heat3), split='-', fixed=TRUE), `[`, 1) 
fixed <- sapply(strsplit(rownames(heat3), split='-', fixed=TRUE), `[`, 2) 

Heatmap(heat3, name = "AF", col = col_fun, border = TRUE,
column_order = order(ord$occ, ord$mouse.id, ord$day),cluster_columns =F,
right_annotation = rowAnnotation(fixed = fixed, bug = bug,  prev = anno_barplot(rowSums(heat3)), col = bugcolors),
 row_gap = unit(0, "mm"), column_gap = unit(0, "mm"),
 top_annotation = HeatmapAnnotation(num = anno_lines(colSums(heat3),
smooth = TRUE,border = TRUE), ra = anno_barplot(t(qpcr), 
    bar_width = 1,gp = gpar(fill = 1:12), height = unit(3, "cm")),
    mouse = heat3.mouse.id,
    group = heat3.mouse.group,
    phase = heat3.phase,
day=anno_simple(heat3.day, pch =heat3.phase2)),
 column_names_gp = gpar(fontsize =5),
 row_names_gp = gpar(fontsize = 3),
 show_row_dend = F,
  show_row_names = F,
 show_column_dend = F

)

All mice clustered

## Warning in dcast(dat, variant.id ~ sample.id, value.var = "AF"): The dcast generic in data.table has been passed a data.frame and will attempt to redirect to the reshape2::dcast; please note that
## reshape2 is deprecated, and this redirection is now deprecated as well. Please do this redirection yourself like reshape2::dcast(dat). In the next version, this warning will become an error.
data.wide[is.na(data.wide)] <- 0
rownames(data.wide) <-  data.wide$variant.id
data.wide$variant.id <- NULL

library(circlize)
library(ComplexHeatmap)

heat <- data.matrix(data.wide)
# limit to variants that are present in at least 10% of samples
heat_num <- rowSums(heat != 0)
heat2 <- heat[which(heat_num > ncol(heat)/10),]

# limit to variants that have a high variance
heat_var_num <- matrixStats::rowVars(heat2)
heat3 <- heat2[which(heat_var_num >  quantile(heat_var_num,  0.5)) ,]

dat$dummy <- 1
annot.data <- aggregate(dummy ~ mouse.id + mouse.group + day + phase, dat, sum)
annot.data$sample.id <- paste0(annot.data$mouse.id, "-",annot.data$day)
heat3.mouse.id <- annot.data[match(colnames(heat3), annot.data$sample.id),]$mouse.id
heat3.day <- annot.data[match(colnames(heat3), annot.data$sample.id),]$day
heat3.mouse.group <- annot.data[match(colnames(heat3), annot.data$sample.id),]$mouse.group
heat3.phase <- annot.data[match(colnames(heat3), annot.data$sample.id),]$phase
heat3.phase2 <- ifelse(heat3.phase == "post-treatment", 6, NA)
ord = data.frame(day = heat3.day, mouse.id =heat3.mouse.id )

occ = as.data.frame(table(heat3.mouse.id))
ord$occ <- occ[match(ord$mouse.id, occ$heat3.mouse.id),]$Freq
data.wide.sub <- dat[match(colnames(heat3), dat$sample.id),]

col_fun = colorRamp2(c(0, 0.5, 1), c("white", "yellow", "red"))

qpcr <- read.table("qpcr.csv", header = T, sep = ";")
qpcr$universal <- NULL
rownames(qpcr) <- paste0(qpcr$mouse, "-",qpcr$day)
qpcr <- qpcr[,-c(1:5)]

qpcr <- apply(qpcr, 1, function(x) x/sum(x))

qpcr <- qpcr[,which(colnames(qpcr) %in% colnames(heat3))]
qpcr <- qpcr[,match(colnames(heat3), colnames(qpcr))]

#pdf("heat.pdf", width= 10, height = 10)
Heatmap(heat3, name = "AF", col = col_fun, border = TRUE,
top_annotation = HeatmapAnnotation(num = anno_lines(colSums(heat3),
smooth = TRUE,border = TRUE), ra = anno_barplot(t(qpcr), 
    bar_width = 1,gp = gpar(fill = 1:12), height = unit(3, "cm")),
    mouse = heat3.mouse.id,
    group = heat3.mouse.group,
    phase = heat3.phase,
day=anno_simple(heat3.day, pch =heat3.phase2)),
cluster_columns =T,
right_annotation = rowAnnotation(prev = anno_barplot(rowSums(heat3))),
 row_gap = unit(0, "mm"), column_gap = unit(0, "mm"),
 column_names_gp = gpar(fontsize =5),
 row_names_gp = gpar(fontsize = 3),
 show_row_dend = F,
  show_row_names = F,
 show_column_dend = T
)

## Warning in dcast(dat, variant.id ~ sample.id, value.var = "AF"): The dcast generic in data.table has been passed a data.frame and will attempt to redirect to the reshape2::dcast; please note that
## reshape2 is deprecated, and this redirection is now deprecated as well. Please do this redirection yourself like reshape2::dcast(dat). In the next version, this warning will become an error.
data.wide[is.na(data.wide)] <- 0
rownames(data.wide) <-  data.wide$variant.id
data.wide$variant.id <- NULL

library(circlize)
library(ComplexHeatmap)

heat <- data.matrix(data.wide)
# limit to variants that are present in at least 10% of samples
heat_num <- rowSums(heat != 0)
heat2 <- heat[which(heat_num > ncol(heat)/10),]

# limit to variants that have a high variance
heat_var_num <- matrixStats::rowVars(heat2)
heat3 <- heat2[which(heat_var_num >  quantile(heat_var_num,  0.5)) ,]

dat$dummy <- 1
annot.data <- aggregate(dummy ~ mouse.id + mouse.group + day + phase, dat, sum)
annot.data$sample.id <- paste0(annot.data$mouse.id, "-",annot.data$day)
heat3.mouse.id <- annot.data[match(colnames(heat3), annot.data$sample.id),]$mouse.id
heat3.day <- annot.data[match(colnames(heat3), annot.data$sample.id),]$day
heat3.mouse.group <- annot.data[match(colnames(heat3), annot.data$sample.id),]$mouse.group
heat3.phase <- annot.data[match(colnames(heat3), annot.data$sample.id),]$phase
heat3.phase2 <- ifelse(heat3.phase == "post-treatment", 6, NA)
ord = data.frame(day = heat3.day, mouse.id =heat3.mouse.id )

occ = as.data.frame(table(heat3.mouse.id))
ord$occ <- occ[match(ord$mouse.id, occ$heat3.mouse.id),]$Freq
data.wide.sub <- dat[match(colnames(heat3), dat$sample.id),]

col_fun = colorRamp2(c(0, 0.5, 1), c("white", "yellow", "red"))

qpcr <- read.table("qpcr.csv", header = T, sep = ";")
qpcr$universal <- NULL
rownames(qpcr) <- paste0(qpcr$mouse, "-",qpcr$day)
qpcr <- qpcr[,-c(1:5)]

qpcr <- t(qpcr)

qpcr <- qpcr[,which(colnames(qpcr) %in% colnames(heat3))]
qpcr <- qpcr[,match(colnames(heat3), colnames(qpcr))]

#pdf("heat.pdf", width= 10, height = 10)
Heatmap(heat3, name = "AF", col = col_fun, border = TRUE,
top_annotation = HeatmapAnnotation(num = anno_lines(colSums(heat3),
smooth = TRUE,border = TRUE), ra = anno_barplot(t(qpcr), 
    bar_width = 1,gp = gpar(fill = 1:12), height = unit(3, "cm")),
    mouse = heat3.mouse.id,
    group = heat3.mouse.group,
    phase = heat3.phase,
day=anno_simple(heat3.day, pch =heat3.phase2)),
cluster_columns =T,
right_annotation = rowAnnotation(prev = anno_barplot(rowSums(heat3))),
 row_gap = unit(0, "mm"), column_gap = unit(0, "mm"),
 column_names_gp = gpar(fontsize =5),
 row_names_gp = gpar(fontsize = 3),
 show_row_dend = F,
  show_row_names = F,
 show_column_dend = T
)

4.8 Focus on mouse where we have many time points

## Warning in dcast(dat, variant.id ~ sample.id, value.var = "AF"): The dcast generic in data.table has been passed a data.frame and will attempt to redirect to the reshape2::dcast; please note that
## reshape2 is deprecated, and this redirection is now deprecated as well. Please do this redirection yourself like reshape2::dcast(dat). In the next version, this warning will become an error.
data.wide[is.na(data.wide)] <- 0
rownames(data.wide) <-  data.wide$variant.id
data.wide$variant.id <- NULL

heat <- data.matrix(data.wide)
# limit to variants that are present in at least 10% of samples
heat_num <- rowSums(heat != 0)
heat2 <- heat[which(heat_num > ncol(heat)/10),]

# limit to variants that have a high variance
heat_var_num <- matrixStats::rowVars(heat2)
heat3 <- heat2[which(heat_var_num >  quantile(heat_var_num,  0.50)) ,]

dat$dummy <- 1
annot.data <- aggregate(dummy ~ mouse.id + mouse.group + day + phase, dat, sum)
annot.data$sample.id <- paste0(annot.data$mouse.id, "-",annot.data$day)

heat3.mouse.id <- annot.data[match(colnames(heat3), annot.data$sample.id),]$mouse.id
heat3.day <- annot.data[match(colnames(heat3), annot.data$sample.id),]$day

ord = data.frame(day = heat3.day, mouse.id =heat3.mouse.id )

heat3.mouse.group <- annot.data[match(colnames(heat3), annot.data$sample.id),]$mouse.group
heat3.phase <- annot.data[match(colnames(heat3), annot.data$sample.id),]$phase
heat3.phase2 <- ifelse(heat3.phase == "post-treatment", 6, NA)

col_fun = colorRamp2(c(0, 0.5, 1), c("white", "yellow", "red"))

# order the heatmap by treatment group

Heatmap(heat3, name = "AF", col = col_fun, border = TRUE,
top_annotation = HeatmapAnnotation(num = anno_lines(colSums(heat3),
smooth = TRUE,border = TRUE),
day=anno_simple(heat3.day, pch =heat3.phase2 )),
cluster_columns =F,
column_order = order(ord$mouse.id, ord$day),
column_split = heat3.mouse.group,
 column_names_gp = gpar(fontsize =5),
 row_names_gp = gpar(fontsize = 8),
 show_row_dend = F,
  show_row_names = F,
 show_column_dend = F
)

4.9 Akkermansia Muciniphila

4.9.1 area plot 1

## Warning in dcast(dat, day + mouse.id + mouse.group ~ variant.id, value.var = "AF"): The dcast generic in data.table has been passed a data.frame and will attempt to redirect to the reshape2::dcast;
## please note that reshape2 is deprecated, and this redirection is now deprecated as well. Please do this redirection yourself like reshape2::dcast(dat). In the next version, this warning will become an
## error.
## Warning in melt(data.wide.reduced, id.vars = c("day", "mouse.id", "mouse.group")): The melt generic in data.table has been passed a data.frame and will attempt to redirect to the relevant reshape2
## method; please note that reshape2 is deprecated, and this redirection is now deprecated as well. To continue using melt methods from reshape2 while both libraries are attached, e.g. melt.list, you can
## prepend the namespace like reshape2::melt(data.wide.reduced). In the next version, this warning will become an error.

4.9.2 line plot

## Warning in dcast(dat, day + mouse.id + mouse.group ~ variant.id, value.var = "AF"): The dcast generic in data.table has been passed a data.frame and will attempt to redirect to the reshape2::dcast;
## please note that reshape2 is deprecated, and this redirection is now deprecated as well. Please do this redirection yourself like reshape2::dcast(dat). In the next version, this warning will become an
## error.
## Warning in melt(data.wide, id.vars = c("day", "mouse.id", "mouse.group")): The melt generic in data.table has been passed a data.frame and will attempt to redirect to the relevant reshape2 method;
## please note that reshape2 is deprecated, and this redirection is now deprecated as well. To continue using melt methods from reshape2 while both libraries are attached, e.g. melt.list, you can prepend
## the namespace like reshape2::melt(data.wide). In the next version, this warning will become an error.